# cd /projects/data_commons/cw_code/
# bash zb_r.sh "2" "16" "cw_geo_state_abb.R" "" ""

sys_args <- commandArgs(trailingOnly=T)
sys_batch <- ifelse(interactive(),0,1)
print(paste0("Last Modified: ",file.mtime(paste0(Sys.getenv("qpath"),"/",Sys.getenv("qscript")))))

print(sys_args)

dir_proj <- "/projects/data_commons/cw/"

library(data.table)

print(paste0("Started at ", Sys.time()))
print(sessionInfo())
setwd(dir_proj)

#===============================================================================

# List of states and abbreviations
dt_st <- data.table(st=state.abb, st_str=toupper(state.name))

# Merge with MSA file to retrieve MSA codes for residual states
dt_msa <- data.table(haven::read_dta("raw/msa_crosswalk.dta"))
setnames(dt_msa, c("name_msa"), c("msa_str"))
dt_out <- dt_msa[msa<60,]

dt_out <- merge(dt_out, dt_st, by.x=c("msa_str"),by.y=c("st_str"))
dt_dev <- dt_out[st=="",]
print(nrow(dt_dev))

# Keep one obs per state
setnames(dt_out, c("msa","msa_str"),c("st_msa","st_str"))
dt_out <- unique(dt_out[, .SD, .SDcols=c("st","st_str","st_msa")])
# REDACTED:
# An update is made to the msa variable for some instance
setorderv(dt_out,c("st_msa"))

# Export
haven::write_dta(dt_out, "cw_geo_state.dta")
saveRDS(dt_out,"cw_geo_state.rds")
fwrite(dt_out,"cw_geo_state.csv")

print(paste0("Ended at ", Sys.time()))
# End of R Script